Objective: General Analysis
In general, the data sets are relatively large, and take up a large amount of memory after loading. It is recommended that after loading, that the data sets be subsetted or summarised, and unused data frames to be removed.
The code is shown in the next three tabs to show any changes that may have been made. In general, no columns or rows are removed. Filtering occured in the next step.
library(dplyr)
library(ggplot2)
library(extrafont)
library(stringr)
loadfonts()
library(tidyr)
library(DT)
library(tidytext)
library(psych)
library(sf)
library(leaflet)
library(ggpubr)
# Theme setting for visualisations
theme_set(theme_minimal() +
theme(panel.grid.minor = element_blank(),
text = element_text(family = "Quicksand",
colour = "black")))
options(scipen = 999)
# Source occ_modifier
source("0_OCC_Modifier.R")
# Loading
mn_1850 = readr::read_csv("../Data/census_1850_occ_mn.csv") %>% mutate(city = "Manhattan")
## Parsed with column specification:
## cols(
## .default = col_double(),
## occstr = col_character(),
## bplstr = col_character(),
## us1850c_1053 = col_character(),
## stdcity = col_character()
## )
## See spec(...) for full column specifications.
bk_1850 = readr::read_csv("../Data/census_1850_occ_bk.csv") %>% mutate(city = "Brooklyn")
## Parsed with column specification:
## cols(
## .default = col_double(),
## occstr = col_character(),
## bplstr = col_character(),
## us1850c_1053 = col_character(),
## stdcity = col_character()
## )
## See spec(...) for full column specifications.
OCC_1880 = readr::read_csv("../Data/OCC1880.csv")
## Parsed with column specification:
## cols(
## code = col_double(),
## occ_label = col_character()
## )
OCC_1950 = readr::read_csv("../Data/OCC1950.csv")
## Parsed with column specification:
## cols(
## code = col_double(),
## occ1950_label = col_character()
## )
# Combining Data
combined_1850 = rbind(mn_1850, bk_1850) %>%
select(age, sex, race, labforce,
occ, city, occ1950, occstr,
enumdist, ward, ind1950) %>%
left_join(OCC_1880, by = c("occ" = "code")) %>%
left_join(OCC_1950, by = c("occ1950" = "code")) %>%
mutate(race = factor(race,
levels = c(100,120,200,210,300),
labels = c("White", "White",
"Black/African American/Negro",
"Mulatto",
"American Indian/Alaska Native (AIAN)")),
labforce = factor(labforce,
levels = c(0,1,2),
labels = c("N/A",
"No, not in the labor force",
"Yes, in the labor force")),
sex = factor(sex,
levels = c(1,2),
labels = c("Male", "Female")),
year = 1850) %>%
# Data prepping for combining
filter(labforce == "Yes, in the labor force") %>%
select(year, age, sex, race, city, enumdist,
occstr, occ_label, occ1950_label, ward, ind1950) %>%
mutate(race = as.character(race),
sex = as.character(sex))
# Saving Memory
rm(bk_1850)
rm(mn_1850)
# Loading
mn_1880 = readr::read_csv("../Data/census_1880_occ_mn.csv") %>% mutate(city = "Manhattan")
bk_1880 = readr::read_csv("../Data/census_1880_occ_bk.csv") %>% mutate(city = "Brooklyn")
# Combining Data
combined_1880 = rbind(mn_1880, bk_1880) %>%
select(age, sex, race, labforce,
occ, city, occstr, occ1950,
enumdist) %>%
left_join(OCC_1880, by = c("occ" = "code")) %>%
mutate(age = ifelse(age == "Less than 1 year old",
0,
age),
age = as.numeric(age),
year = 1880) %>%
# Data prepping for combining
filter(labforce == "Yes, in the labor force") %>%
select(year, age, sex, race, city, enumdist,
occstr, occ_label, occ1950_label = occ1950)
rm(OCC_1880)
rm(bk_1880)
rm(mn_1880)
# Loading
mn_1910 = readr::read_csv("../Data/census_1910_occ_mn.csv") %>% mutate(city = "Manhattan")
bk_1910 = readr::read_csv("../Data/census_1910_occ_bk.csv") %>% mutate(city = "Brooklyn")
OCC_1950 = readr::read_csv("../Data/OCC1950.csv")
# Combining Data
combined_1910 = rbind(mn_1910, bk_1910) %>%
left_join(OCC_1950, by = c("occ1950" = "code")) %>%
select(age, sex, labor_force, race,
occ1950, occstr, occ1950_label, city,
enumdist) %>%
mutate(sex = factor(sex,
levels = c(1,2),
labels = c("Male", "Female")),
labor_force = factor(labor_force,
levels = c(0,1,2),
labels = c("N/A",
"No, not in the labor force",
"Yes, in the labor force")),
race = factor(race,
levels = c(100, 140, 200, 210, 300, 400, 500, 600, 672),
labels = c("White", "Mexican (1930)",
"Black/African American/Negro",
"Mulatto",
"American Indian/Alaska Native (AIAN)",
"Chinese",
"Japanese",
"Filipino",
"Asian, not specified")),
year = 1910) %>%
# Data prepping for combining
filter(labor_force == "Yes, in the labor force") %>%
select(year, age, sex, race, city, enumdist,
occstr, occ_label = occ1950_label) %>%
mutate(occ1950_label = occ_label,
sex = as.character(sex),
race = as.character(race),
enumdist = as.numeric(enumdist))
rm(OCC_1950)
rm(mn_1910)
rm(bk_1910)
occ_modifiercombined =
bind_rows(combined_1850,
combined_1880,
combined_1910) %>%
mutate(occstr = occ_modifier(occstr))
rm(combined_1850)
rm(combined_1880)
rm(combined_1910)
readr::write_csv(combined, "combined.csv")
combined = readr::read_csv("../Data/combined.csv") %>%
mutate(year = as.factor(year),
sex = as.factor(sex),
race = as.factor(race),
city = as.factor(city))
## Parsed with column specification:
## cols(
## year = col_double(),
## age = col_double(),
## sex = col_character(),
## race = col_character(),
## city = col_character(),
## enumdist = col_double(),
## occstr = col_character(),
## occ_label = col_character(),
## occ1950_label = col_character()
## )
# To reduce computation later
var_lf_1850 = combined %>% filter(year == 1850) %>% NROW()
var_lf_1880 = combined %>% filter(year == 1880) %>% NROW()
var_lf_1910 = combined %>% filter(year == 1910) %>% NROW()
var_lf = var_lf_1850 + var_lf_1880 + var_lf_1910
top_n_occupations = function(n, year_in = "all") {
if (year_in == "all") {
return(
combined %>%
filter(occ1950_label != "Not yet classified") %>%
group_by(occ1950_label) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
head(n) %>%
.$occ1950_label
)
} else if (year_in %in% c("1850", "1880", "1910")) {
return (
combined %>%
filter(year == year_in & occ1950_label != "Not yet classified") %>%
group_by(occ1950_label) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
head(n) %>%
.$occ1950_label
)
}
}
bk_1850 = st_read("../Data/shpfiles/bk_shapefiles/Ward_1850_BK.shp",
stringsAsFactors = FALSE) %>%
select(Ward_Num, geometry)
## Reading layer `Ward_1850_BK' from data source `C:\Users\Clinton\Documents\GitHub\hnyc_occupations\Data\shpfiles\bk_shapefiles\Ward_1850_BK.shp' using driver `ESRI Shapefile'
## Simple feature collection with 11 features and 3 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -8240555 ymin: 4958954 xmax: -8226921 ymax: 4969322
## epsg (SRID): 3857
## proj4string: +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs
mn_1850 = st_read("../Data/shpfiles/mn_shapefiles/Ward_1850_MN.shp",
stringsAsFactors = FALSE) %>%
select(Ward_Num, geometry)
## Reading layer `Ward_1850_MN' from data source `C:\Users\Clinton\Documents\GitHub\hnyc_occupations\Data\shpfiles\mn_shapefiles\Ward_1850_MN.shp' using driver `ESRI Shapefile'
## Simple feature collection with 19 features and 3 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -8239443 ymin: 4968339 xmax: -8227670 ymax: 4994306
## epsg (SRID): 3857
## proj4string: +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs
shp_1850 = rbind(bk_1850, mn_1850) %>%
st_transform(crs = 4326)
rm(bk_1850)
rm(mn_1850)
bk_1880 = st_read("../Data/shpfiles/bk_shapefiles/ED_1880_S4_BK.shp",
stringsAsFactors = FALSE) %>%
select(ED, geometry)
## Reading layer `ED_1880_S4_BK' from data source `C:\Users\Clinton\Documents\GitHub\hnyc_occupations\Data\shpfiles\bk_shapefiles\ED_1880_S4_BK.shp' using driver `ESRI Shapefile'
## Simple feature collection with 250 features and 152 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -8240479 ymin: 4958979 xmax: -8226015 ymax: 4973979
## epsg (SRID): 3857
## proj4string: +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs
mn_1880 = st_read("../Data/shpfiles/mn_shapefiles/ED_1880_MN.shp",
stringsAsFactors = FALSE) %>%
select(ED = ed80, geometry)
## Reading layer `ED_1880_MN' from data source `C:\Users\Clinton\Documents\GitHub\hnyc_occupations\Data\shpfiles\mn_shapefiles\ED_1880_MN.shp' using driver `ESRI Shapefile'
## Simple feature collection with 662 features and 41 fields
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: -8239612 ymin: 4968158 xmax: -8227670 ymax: 4994306
## epsg (SRID): 3857
## proj4string: +proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs
shp_1880 = rbind(bk_1880, mn_1880) %>%
st_transform(crs = 4326)
rm(bk_1880)
rm(mn_1880)
bk_1910 = st_read("../Data/shpfiles/bk_shapefiles/Brooklyn_1910.shp",
stringsAsFactors = FALSE)
## Reading layer `Brooklyn_1910' from data source `C:\Users\Clinton\Documents\GitHub\hnyc_occupations\Data\shpfiles\bk_shapefiles\Brooklyn_1910.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1112 features and 1 field
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 1825539 ymin: 556929.4 xmax: 1841193 ymax: 576192.4
## epsg (SRID): NA
## proj4string: +proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs
mn_1910 = st_read("../Data/shpfiles/mn_shapefiles/Manhattan_1910.shp",
stringsAsFactors = FALSE)
## Reading layer `Manhattan_1910' from data source `C:\Users\Clinton\Documents\GitHub\hnyc_occupations\Data\shpfiles\mn_shapefiles\Manhattan_1910.shp' using driver `ESRI Shapefile'
## Simple feature collection with 1480 features and 1 field
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: 1823260 ymin: 568821.3 xmax: 1831540 ymax: 591889.7
## epsg (SRID): NA
## proj4string: +proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +datum=NAD83 +units=m +no_defs
shp_1910 = rbind(bk_1910, mn_1910) %>%
mutate(ED = as.numeric(ED)) %>%
st_transform(crs = 4326)
rm(bk_1910)
rm(mn_1910)
combined %>%
ggplot(aes(x = age)) +
geom_histogram(binwidth = 5,
fill = "#FBD75C",
col = "#FFA600") +
scale_x_continuous(limits = c(0, 100),
breaks = seq(0, 100, 10)) +
labs(y = "Count", x = "Age",
title = "Histogram of Age of the Labor Force")
describe(combined$age)
## vars n mean sd median trimmed mad min max range skew
## X1 1 2712978 33.04 12.75 30 31.74 13.34 16 126 110 0.85
## kurtosis se
## X1 0.24 0.01
col_3 = c("#ffa600", "#bc5090", "#003f5c")
combined %>%
ggplot(aes(x = age, fill = year)) +
geom_histogram(binwidth = 5) +
facet_wrap(~year) +
scale_fill_manual(values = col_3) +
labs(y = "Count", x = "Age",
title = "Histogram of Age of Labor Force across Time Periods") +
theme(legend.position = "none")
combined %>%
select(age, year) %>%
describeBy(group = .$year)
##
## Descriptive statistics by group
## group: 1850
## vars n mean sd median trimmed mad min max range skew
## age 1 185032 32.04 11.11 30 30.9 10.38 16 126 110 1.01
## year* 2 185032 1.00 0.00 1 1.0 0.00 1 1 0 NaN
## kurtosis se
## age 1.06 0.03
## year* NaN 0.00
## --------------------------------------------------------
## group: 1880
## vars n mean sd median trimmed mad min max range skew
## age 1 705712 33.4 12.97 30 32.14 13.34 16 108 92 0.78
## year* 2 705728 2.0 0.00 2 2.00 0.00 2 2 0 NaN
## kurtosis se
## age 0.03 0.02
## year* NaN 0.00
## --------------------------------------------------------
## group: 1910
## vars n mean sd median trimmed mad min max range skew
## age 1 1822234 33 12.82 30 31.68 13.34 16 104 88 0.86
## year* 2 1822234 3 0.00 3 3.00 0.00 3 3 0 NaN
## kurtosis se
## age 0.26 0.01
## year* NaN 0.00
col_2 = c("#FF5959", "#003A6F")
combined %>%
group_by(year, sex) %>%
summarise(count = n()) %>%
ggplot(aes(y = count, x = sex, fill = sex)) +
geom_col() +
geom_text(aes(label = count),
family = "Quicksand",
vjust = -0.5) +
facet_wrap(~year) +
scale_fill_manual(values = col_2) +
labs(y = "Count", x = "Sex",
title = "Graph of Sex of Labor Force across Time Periods") +
theme(legend.position = "none")
combined %>%
group_by(year, sex) %>%
summarise(count = n()) %>%
mutate(count = case_when(
year == 1850 ~ count / var_lf_1850,
year == 1880 ~ count / var_lf_1880,
year == 1910 ~ count / var_lf_1910),
count = round(count * 100,
digits = 1)) %>%
ggplot(aes(y = count, x = sex, fill = sex)) +
geom_col() +
geom_text(aes(label = paste0(count, "%")),
family = "Quicksand",
vjust = -0.5) +
facet_wrap(~year) +
scale_fill_manual(values = col_2) +
labs(y = "Count", x = "Sex",
title = "Graph of Sex of Percentage of Labor Force across Time Periods") +
theme(legend.position = "none")
combined %>%
group_by(year, race) %>%
summarise(count = n()) %>%
ggplot(aes(y = count, x = race, fill = race)) +
geom_col() +
geom_text(aes(label = count),
family = "Quicksand",
hjust = 0) +
facet_wrap(~year, scales = "free_x") +
coord_flip() +
scale_y_continuous(expand = expand_scale(mult = c(0, 0.5))) +
labs(y = "Count", x = "Race",
title = "Graph of Race of Labor Force across Time Periods") +
theme(legend.position = "none",
axis.text.x = element_text(size = 7))
combined %>%
group_by(year, race) %>%
summarise(count = n()) %>%
mutate(count = case_when(
year == 1850 ~ count / var_lf_1850,
year == 1880 ~ count / var_lf_1880,
year == 1910 ~ count / var_lf_1910),
count = round(count * 100,
digits = 1)) %>%
ggplot(aes(y = count, x = race, fill = race)) +
geom_col() +
geom_text(aes(label = paste0(count, "%")),
family = "Quicksand",
hjust = 0) +
facet_wrap(~year, scales = "free_x") +
coord_flip() +
scale_y_continuous(expand = expand_scale(mult = c(0, 0.5))) +
labs(y = "Count", x = "Race",
title = "Graph of Percentage of Race of Labor Force across Time Periods") +
theme(legend.position = "none",
axis.text.x = element_text(size = 7))
combined %>%
group_by(year, city) %>%
summarise(count = n()) %>%
ggplot(aes(y = count, x = city, fill = city)) +
geom_col() +
geom_text(aes(label = count),
family = "Quicksand",
hjust = 0) +
facet_wrap(~year) +
coord_flip() +
scale_fill_manual(values = col_2) +
scale_y_continuous(expand = expand_scale(mult = c(0, 0.5))) +
labs(y = "Count", x = "Borough",
title = "Graph of Borough of Labor Force across Time Periods") +
theme(legend.position = "none")
combined %>%
group_by(year, city) %>%
summarise(count = n()) %>%
mutate(count = case_when(
year == 1850 ~ count / var_lf_1850,
year == 1880 ~ count / var_lf_1880,
year == 1910 ~ count / var_lf_1910),
count = round(count * 100,
digits = 1)) %>%
ggplot(aes(y = count, x = city, fill = city)) +
geom_col() +
geom_text(aes(label = paste0(count, "%")),
family = "Quicksand",
hjust = 0) +
facet_wrap(~year) +
coord_flip() +
scale_fill_manual(values = col_2) +
scale_y_continuous(expand = expand_scale(mult = c(0, 0.5))) +
labs(y = "Count", x = "Borough",
title = "Graph of Percent of Borough of Labor Force across Time Periods") +
theme(legend.position = "none")
combined %>%
group_by(occ1950_label) %>%
summarise(count = n()) %>%
mutate(perc = round(count / var_lf * 100,
digits = 2)) %>%
arrange(desc(count)) %>%
head(200) %>% # Datatable can only handle a limited amount of data
datatable()
combined %>%
filter(year == 1850) %>%
group_by(occ1950_label) %>%
summarise(count = n()) %>%
mutate(perc = round(count / var_lf_1850 * 100,
digits = 2)) %>%
arrange(desc(count)) %>%
head(200) %>% # Datatable can only handle a limited amount of data
datatable()
combined %>%
filter(year == 1880) %>%
group_by(occ1950_label) %>%
summarise(count = n()) %>%
mutate(perc = round(count / var_lf_1880 * 100,
digits = 2)) %>%
arrange(desc(count)) %>%
head(200) %>% # Datatable can only handle a limited amount of data
datatable()
combined %>%
filter(year == 1910) %>%
group_by(occ1950_label) %>%
summarise(count = n()) %>%
mutate(perc = round(count / var_lf_1910 * 100,
digits = 2)) %>%
arrange(desc(count)) %>%
head(200) %>% # Datatable can only handle a limited amount of data
datatable()
combined %>%
group_by(year, occ1950_label) %>%
summarise(count = n()) %>%
group_by(year) %>%
top_n(10, wt = count) %>%
ggplot(aes(y = count,
x = reorder_within(occ1950_label, count, year),
fill = year)) +
geom_col() +
geom_text(aes(label = count),
family = "Quicksand",
hjust = 0) +
facet_wrap(~year, scales = "free_y") +
scale_x_reordered() +
scale_y_continuous(expand = expand_scale(mult = c(0, 0.5))) +
scale_fill_manual(values = col_3) +
coord_flip() +
labs(y = "Count", x = "Occupation",
title = "Graph of Most Popular Occupations across Time Periods") +
theme(legend.position = "none")
combined %>%
group_by(year, occ1950_label) %>%
summarise(count = n()) %>%
group_by(year) %>%
top_n(10, wt = count) %>%
mutate(count = case_when(
year == 1850 ~ count / var_lf_1850,
year == 1880 ~ count / var_lf_1880,
year == 1910 ~ count / var_lf_1910),
count = round(count * 100,
digits = 1)) %>%
ggplot(aes(y = count,
x = reorder_within(occ1950_label, count, year),
fill = year)) +
geom_col() +
geom_text(aes(label = paste0(count, "%")),
family = "Quicksand",
hjust = 0) +
facet_wrap(~year, scales = "free_y") +
scale_x_reordered() +
scale_y_continuous(expand = expand_scale(mult = c(0, 0.5))) +
scale_fill_manual(values = col_3) +
coord_flip() +
labs(y = "Count", x = "Occupation",
title = "Graph of Most Popular Occupations across Time Periods as Percentage of Labor Force") +
theme(legend.position = "none")
Occupation
combined_1850 %>%
filter(year == 1850 &
occ1950_label %in% top_n_occupations(10, 1850)) %>%
group_by(occ1950_label, ward) %>%
summarise(count = n()) %>%
left_join(shp_1850, ., by = c("Ward_Num" = "ward")) %>%
filter(!is.na(occ1950_label)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~occ1950_label, nrow = 2) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Normalised by size of each ward
combined_1850 %>%
group_by(ward) %>%
mutate(ward_size = n()) %>%
filter(occ1950_label %in% top_n_occupations(10, 1850)) %>%
group_by(occ1950_label, ward, ward_size) %>%
summarise(count = n()) %>%
mutate(count = count / ward_size * 100) %>%
left_join(shp_1850, ., by = c("Ward_Num" = "ward")) %>%
filter(!is.na(occ1950_label)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~occ1950_label, nrow = 2) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Grouped
combined_1850 %>%
filter(year == 1850 &
occ1950_label %in% top_n_occupations(10, 1850)) %>%
group_by(occ1950_label, ward) %>%
summarise(count = n()) %>%
mutate(count = case_when(
count <= 500 ~ 1,
count <= 1000 ~ 2,
count <= 1500 ~ 3,
count <= 2000 ~ 4,
count <= Inf ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 500", "501 - 1000",
"1001 - 1500", "1501 - 2000",
"2001 - Inf"))) %>%
left_join(shp_1850, ., by = c("Ward_Num" = "ward")) %>%
filter(!is.na(occ1950_label)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~occ1950_label, nrow = 2) +
scale_fill_viridis_d() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Normalised and grouped by size of each ward
combined_1850 %>%
group_by(ward) %>%
mutate(ward_size = n()) %>%
filter(occ1950_label %in% top_n_occupations(10, 1850)) %>%
group_by(occ1950_label, ward, ward_size) %>%
summarise(count = n()) %>%
mutate(count = count / ward_size * 100,
count = case_when(
count <= 5 ~ 1,
count <= 10 ~ 2,
count <= 15 ~ 3,
count <= 20 ~ 4,
count <= 100 ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 5", " 6 - 10",
"11 - 15", "16 - 20",
"21 - 100"))) %>%
left_join(shp_1850, ., by = c("Ward_Num" = "ward")) %>%
filter(!is.na(occ1950_label)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~occ1950_label, nrow = 2) +
scale_fill_viridis_d() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Race
combined_1850 %>%
group_by(race, ward) %>%
summarise(count = n()) %>%
left_join(shp_1850, ., by = c("Ward_Num" = "ward")) %>%
filter(!is.na(race)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~race, nrow = 2) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Normalised by size of each ward
combined_1850 %>%
group_by(ward) %>%
mutate(ward_size = n()) %>%
group_by(race, ward, ward_size) %>%
summarise(count = n()) %>%
mutate(count = count / ward_size * 100) %>%
left_join(shp_1850, ., by = c("Ward_Num" = "ward")) %>%
filter(!is.na(race)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~race, nrow = 2) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Grouped
combined_1850 %>%
group_by(race, ward) %>%
summarise(count = n()) %>%
mutate(count = case_when(
count <= 5000 ~ 1,
count <= 7500 ~ 2,
count <= 10000 ~ 3,
count <= 12500 ~ 4,
count <= Inf ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 5000", " 5001 - 7500",
" 7501 - 10000", "10001 - 12500",
"12501 - Inf"))) %>%
left_join(shp_1850, ., by = c("Ward_Num" = "ward")) %>%
filter(!is.na(race)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~race, nrow = 2) +
scale_fill_viridis_d() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Normalised and grouped by size of each ward
combined_1850 %>%
group_by(ward) %>%
mutate(ward_size = n()) %>%
group_by(race, ward, ward_size) %>%
summarise(count = n()) %>%
mutate(count = count / ward_size * 100,
count = case_when(
count <= 80 ~ 1,
count <= 85 ~ 2,
count <= 90 ~ 3,
count <= 95 ~ 4,
count <= 100 ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 80", " 81 - 85",
"86 - 90", "91 - 95",
"96 - 100"))) %>%
left_join(shp_1850, ., by = c("Ward_Num" = "ward")) %>%
filter(!is.na(race)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~race, nrow = 2) +
scale_fill_viridis_d() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Occupation
combined %>%
filter(year == 1880 &
occ1950_label %in% top_n_occupations(10, 1880)) %>%
group_by(occ1950_label, enumdist) %>%
summarise(count = n()) %>%
left_join(shp_1880, ., by = c("ED" = "enumdist")) %>%
filter(!is.na(occ1950_label)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~occ1950_label, nrow = 2) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Normalised by size of each enumeration district
combined %>%
filter(year == 1880) %>%
group_by(enumdist) %>%
mutate(enum_size = n()) %>%
filter(occ1950_label %in% top_n_occupations(10, 1880)) %>%
group_by(occ1950_label, enumdist, enum_size) %>%
summarise(count = n()) %>%
mutate(count = count / enum_size * 100) %>%
left_join(shp_1880, ., by = c("ED" = "enumdist")) %>%
filter(!is.na(occ1950_label)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~occ1950_label, nrow = 2) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Grouped
combined %>%
filter(year == 1880 &
occ1950_label %in% top_n_occupations(10, 1880)) %>%
group_by(occ1950_label, enumdist) %>%
summarise(count = n()) %>%
mutate(count = case_when(
count <= 100 ~ 1,
count <= 200 ~ 2,
count <= 300 ~ 3,
count <= 400 ~ 4,
count <= Inf ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 100", "101 - 200",
"201 - 300", "301 - 400",
"401 - Inf"))) %>%
left_join(shp_1880, ., by = c("ED" = "enumdist")) %>%
filter(!is.na(occ1950_label)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~occ1950_label, nrow = 2) +
scale_fill_viridis_d() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Grouped and Normalised by size of each enumeration district
combined %>%
filter(year == 1880) %>%
group_by(enumdist) %>%
mutate(enum_size = n()) %>%
filter(occ1950_label %in% top_n_occupations(10, 1880)) %>%
group_by(occ1950_label, enumdist, enum_size) %>%
summarise(count = n()) %>%
mutate(count = count / enum_size * 100,
count = case_when(
count <= 15 ~ 1,
count <= 30 ~ 2,
count <= 45 ~ 3,
count <= 60 ~ 4,
count <= 100 ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 15", "16 - 30",
"31 - 45", "46 - 60",
"61 - 100"))) %>%
left_join(shp_1880, ., by = c("ED" = "enumdist")) %>%
filter(!is.na(occ1950_label)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~occ1950_label, nrow = 2) +
scale_fill_viridis_d() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Race
combined %>%
filter(year == 1880) %>%
group_by(race, enumdist) %>%
summarise(count = n()) %>%
left_join(shp_1880, ., by = c("ED" = "enumdist")) %>%
filter(!is.na(race)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~race, nrow = 2) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Normalised by size of each enumeration district
combined %>%
filter(year == 1880) %>%
group_by(enumdist) %>%
mutate(enum_size = n()) %>%
group_by(race, enumdist, enum_size) %>%
summarise(count = n()) %>%
mutate(count = count / enum_size * 100) %>%
left_join(shp_1880, ., by = c("ED" = "enumdist")) %>%
filter(!is.na(race)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~race, nrow = 2) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Grouped
combined %>%
filter(year == 1880) %>%
group_by(race, enumdist) %>%
summarise(count = n()) %>%
mutate(count = case_when(
count <= 500 ~ 1,
count <= 1000 ~ 2,
count <= 1500 ~ 3,
count <= 2000 ~ 4,
count <= Inf ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 500", " 501 - 1000",
"1001 - 1500", "1501 - 2000",
"2001 - Inf"))) %>%
left_join(shp_1880, ., by = c("ED" = "enumdist")) %>%
filter(!is.na(race)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~race, nrow = 2) +
scale_fill_viridis_d() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Grouped and Normalised by size of each enumeration district
combined %>%
filter(year == 1880) %>%
group_by(enumdist) %>%
mutate(enum_size = n()) %>%
group_by(race, enumdist, enum_size) %>%
summarise(count = n()) %>%
mutate(count = count / enum_size * 100,
count = case_when(
count <= 20 ~ 1,
count <= 40 ~ 2,
count <= 60 ~ 3,
count <= 80 ~ 4,
count <= 100 ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 20", "21 - 40",
"41 - 60", "61 - 80",
"81 - 100"))) %>%
left_join(shp_1880, ., by = c("ED" = "enumdist")) %>%
filter(!is.na(race)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~race, nrow = 2) +
scale_fill_viridis_d() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
sub = combined %>%
filter(year == 1910) %>%
group_by(enumdist) %>%
summarise(count = n()) %>%
left_join(shp_1910, ., by = c("ED" = "enumdist"))
bins <- c(0, 500, 1000, 1500, 2000, 2500, 3000)
pal <- colorBin("YlOrRd",
domain = sub$count,
bins = bins)
label = paste("<b>Count:</b>", sub$count) %>%
lapply(htmltools::HTML)
sub %>%
leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(lat = 40.7128, lng = -73.9500, zoom = 11) %>%
addPolygons(fillColor = ~pal(count),
fillOpacity = 0.7,
weight = 0.25,
color = "black",
label = label) %>%
addLegend(pal = pal,
values = ~count,
position = "bottomright")
Occupation
combined %>%
filter(year == 1910 &
occ1950_label %in% top_n_occupations(10, 1910)) %>%
group_by(occ1950_label, enumdist) %>%
summarise(count = n()) %>%
left_join(shp_1910, ., by = c("ED" = "enumdist")) %>%
filter(!is.na(occ1950_label)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~occ1950_label, nrow = 2) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Normalised by size of each enumeration district
combined %>%
filter(year == 1910) %>%
group_by(enumdist) %>%
mutate(enum_size = n()) %>%
filter(occ1950_label %in% top_n_occupations(10, 1910)) %>%
group_by(occ1950_label, enumdist, enum_size) %>%
summarise(count = n()) %>%
mutate(count = count / enum_size * 100) %>%
left_join(shp_1910, ., by = c("ED" = "enumdist")) %>%
filter(!is.na(occ1950_label)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~occ1950_label, nrow = 2) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Grouped
combined %>%
filter(year == 1910 &
occ1950_label %in% top_n_occupations(10, 1910)) %>%
group_by(occ1950_label, enumdist) %>%
summarise(count = n()) %>%
mutate(count = case_when(
count <= 100 ~ 1,
count <= 200 ~ 2,
count <= 300 ~ 3,
count <= 400 ~ 4,
count <= Inf ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 100", "101 - 200",
"201 - 300", "301 - 400",
"401 - Inf"))) %>%
left_join(shp_1910, ., by = c("ED" = "enumdist")) %>%
filter(!is.na(occ1950_label)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~occ1950_label, nrow = 2) +
scale_fill_viridis_d() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Grouped and Normalised by size of each enumeration district
combined %>%
filter(year == 1910) %>%
group_by(enumdist) %>%
mutate(enum_size = n()) %>%
filter(occ1950_label %in% top_n_occupations(10, 1910)) %>%
group_by(occ1950_label, enumdist, enum_size) %>%
summarise(count = n()) %>%
mutate(count = count / enum_size * 100,
count = case_when(
count <= 20 ~ 1,
count <= 40 ~ 2,
count <= 60 ~ 3,
count <= 80 ~ 4,
count <= 100 ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 20", "21 - 40",
"41 - 60", "61 - 80",
"81 - 100"))) %>%
left_join(shp_1910, ., by = c("ED" = "enumdist")) %>%
filter(!is.na(occ1950_label)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~occ1950_label, nrow = 2) +
scale_fill_viridis_d() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Race
combined %>%
filter(year == 1910) %>%
group_by(race, enumdist) %>%
summarise(count = n()) %>%
left_join(shp_1910, ., by = c("ED" = "enumdist")) %>%
filter(!is.na(race)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~race, nrow = 2) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Normalised by size of each enumeration district
combined %>%
filter(year == 1910) %>%
group_by(enumdist) %>%
mutate(enum_size = n()) %>%
group_by(race, enumdist, enum_size) %>%
summarise(count = n()) %>%
mutate(count = count / enum_size * 100) %>%
left_join(shp_1910, ., by = c("ED" = "enumdist")) %>%
filter(!is.na(race)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~race, nrow = 2) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Grouped
combined %>%
filter(year == 1910) %>%
group_by(race, enumdist) %>%
summarise(count = n()) %>%
mutate(count = case_when(
count <= 500 ~ 1,
count <= 1000 ~ 2,
count <= 1500 ~ 3,
count <= 2000 ~ 4,
count <= Inf ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 500", " 501 - 1000",
"1001 - 1500", "1501 - 2000",
"2001 - Inf"))) %>%
left_join(shp_1910, ., by = c("ED" = "enumdist")) %>%
filter(!is.na(race)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~race, nrow = 2) +
scale_fill_viridis_d() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
Grouped and Normalised by size of each enumeration district
combined %>%
filter(year == 1910) %>%
group_by(enumdist) %>%
mutate(enum_size = n()) %>%
group_by(race, enumdist, enum_size) %>%
summarise(count = n()) %>%
mutate(count = count / enum_size * 100,
count = case_when(
count <= 20 ~ 1,
count <= 40 ~ 2,
count <= 60 ~ 3,
count <= 80 ~ 4,
count <= 100 ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 20", "21 - 40",
"41 - 60", "61 - 80",
"81 - 100"))) %>%
left_join(shp_1910, ., by = c("ED" = "enumdist")) %>%
filter(!is.na(race)) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
facet_wrap(~race, nrow = 2) +
scale_fill_viridis_d() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
top_10_occ_combined = c(top_n_occupations(10, 1850),
top_n_occupations(10, 1880),
top_n_occupations(10, 1910)) %>%
unique()
top_10_occ_combined =
data.frame(year = rep(c(1850, 1880, 1910),
each = length(top_10_occ_combined)),
occ1950_label = rep(top_10_occ_combined, 3),
stringsAsFactors = FALSE)
top_10_occ_combined = top_10_occ_combined %>%
mutate(top = case_when(
year == 1850 & occ1950_label %in% top_n_occupations(10, 1850) ~ 1,
year == 1880 & occ1950_label %in% top_n_occupations(10, 1880) ~ 1,
year == 1910 & occ1950_label %in% top_n_occupations(10, 1910) ~ 1,
TRUE ~ 0
))
combined %>%
mutate(year = as.numeric(as.character(year))) %>%
filter(occ1950_label %in% top_10_occ_combined$occ1950_label) %>%
group_by(year, occ1950_label) %>%
summarise(count = n()) %>%
left_join(top_10_occ_combined, .,
by = c("year", "occ1950_label")) %>%
group_by(occ1950_label) %>%
mutate(difference = first(count) - last(count),
colour = ifelse(difference > 0,
1,
0)) %>%
ggplot(aes(y = count, x = year)) +
geom_col(aes(fill = factor(colour),
col = factor(top)),
size = 1) +
facet_wrap(~reorder(occ1950_label, difference),
scales = "free_y") +
scale_x_continuous(breaks = c(1850, 1880, 1910)) +
scale_fill_manual(values = c("#56c1ab", "#ee4c58")) +
scale_color_manual(values = c("white", "black")) +
labs(y = "Occupations", col = "Year", x = "Count",
title = "Graph of Occupation Counts in Top 10 Occupations from 1850, 1880 and 1910",
caption = "Note: Black borders were if the occupation was a top 10 occupation in that time\nperiod. Occupations areordered by the size of change from 1850 to 1910") +
theme(legend.position = "none")
combined %>%
mutate(year = as.numeric(as.character(year))) %>%
filter(occ1950_label %in% top_10_occ_combined$occ1950_label) %>%
group_by(year, occ1950_label) %>%
summarise(count = n()) %>%
mutate(count = case_when(
year == 1850 ~ count / var_lf_1850 * 100,
year == 1880 ~ count / var_lf_1880 * 100,
year == 1910 ~ count / var_lf_1910 * 100
)) %>%
left_join(top_10_occ_combined, .,
by = c("year", "occ1950_label")) %>%
group_by(occ1950_label) %>%
mutate(difference = first(count) - last(count),
colour = ifelse(difference > 0,
1,
0)) %>%
ggplot(aes(y = count, x = year)) +
geom_col(aes(fill = factor(colour),
col = factor(top)),
size = 1) +
facet_wrap(~reorder(occ1950_label, difference),
scales = "free_y") +
scale_x_continuous(breaks = c(1850, 1880, 1910)) +
scale_fill_manual(values = c("#56c1ab", "#ee4c58")) +
scale_color_manual(values = c("white", "black")) +
labs(y = "Occupations", col = "Year", x = "Count",
title = "Graph of Occupation Counts in Top 10 Occupations from 1850, 1880 and 1910",
caption = "Note: Black borders were if the occupation was a top 10 occupation in that time\nperiod. Occupations areordered by the size of change from 1850 to 1910") +
theme(legend.position = "none")
Operative and kindred workers (n.e.c.)
tmp_map_1850 = combined_1850 %>%
filter(year == 1850,
occ1950_label == "Operative and kindred workers (n.e.c.)") %>%
group_by(ward) %>%
summarise(count = n()) %>%
left_join(shp_1850, ., by = c("Ward_Num" = "ward")) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
tmp_map_1880 = combined %>%
filter(year == 1880,
occ1950_label == "Operative and kindred workers (n.e.c.)") %>%
group_by(enumdist) %>%
summarise(count = n()) %>%
left_join(shp_1880, ., by = c("ED" = "enumdist")) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
tmp_map_1910 = combined %>%
filter(year == 1910,
occ1950_label == "Operative and kindred workers (n.e.c.)") %>%
group_by(enumdist) %>%
summarise(count = n()) %>%
left_join(shp_1910, ., by = c("ED" = "enumdist")) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
ggarrange(tmp_map_1850, tmp_map_1880, tmp_map_1910,
labels = c("1850", "1880", "1910"),
nrow = 1,
font.label = list(size = 12, face = "plain", family = "Quicksand"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
Operative and kindred workers (n.e.c.) Normalised
tmp_map_1850 = combined_1850 %>%
group_by(ward) %>%
mutate(ward_size = n()) %>%
filter(year == 1850,
occ1950_label == "Operative and kindred workers (n.e.c.)") %>%
group_by(ward, ward_size) %>%
summarise(count = n()) %>%
mutate(count = count / ward_size * 100,
count = case_when(
count <= 5 ~ 1,
count <= 10 ~ 2,
count <= 15 ~ 3,
count <= 20 ~ 4,
count <= 100 ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 5", " 6 - 10",
"11 - 15", "16 - 20",
"21 - 100"))) %>%
left_join(shp_1850, ., by = c("Ward_Num" = "ward")) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
scale_fill_viridis_d(drop = FALSE) +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
tmp_map_1880 = combined %>%
filter(year == 1880) %>%
group_by(enumdist) %>%
mutate(enum_size = n()) %>%
filter(occ1950_label == "Operative and kindred workers (n.e.c.)") %>%
group_by(occ1950_label, enumdist, enum_size) %>%
summarise(count = n()) %>%
mutate(count = count / enum_size * 100,
count = case_when(
count <= 5 ~ 1,
count <= 10 ~ 2,
count <= 15 ~ 3,
count <= 25 ~ 4,
count <= 100 ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 5", " 6 - 10",
"11 - 15", "16 - 20",
"21 - 100"))) %>%
left_join(shp_1880, ., by = c("ED" = "enumdist")) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
scale_fill_viridis_d(drop = FALSE) +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
tmp_map_1910 = combined %>%
filter(year == 1910) %>%
group_by(enumdist) %>%
mutate(enum_size = n()) %>%
filter(occ1950_label == "Operative and kindred workers (n.e.c.)") %>%
group_by(occ1950_label, enumdist, enum_size) %>%
summarise(count = n()) %>%
mutate(count = count / enum_size * 100,
count = case_when(
count <= 5 ~ 1,
count <= 10 ~ 2,
count <= 15 ~ 3,
count <= 20 ~ 4,
count <= 100 ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 5", " 6 - 10",
"11 - 15", "16 - 20",
"21 - 100"))) %>%
left_join(shp_1910, ., by = c("ED" = "enumdist")) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
scale_fill_viridis_d(drop = FALSE) +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
ggarrange(tmp_map_1850, tmp_map_1880, tmp_map_1910,
labels = c("1850", "1880", "1910"),
nrow = 1,
font.label = list(size = 12, face = "plain", family = "Quicksand"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
Managers, officials, and proprietors (n.e.c.)
tmp_map_1850 = combined_1850 %>%
filter(year == 1850,
occ1950_label == "Managers, officials, and proprietors (n.e.c.)") %>%
group_by(ward) %>%
summarise(count = n()) %>%
left_join(shp_1850, ., by = c("Ward_Num" = "ward")) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
tmp_map_1880 = combined %>%
filter(year == 1880,
occ1950_label == "Managers, officials, and proprietors (n.e.c.)") %>%
group_by(enumdist) %>%
summarise(count = n()) %>%
left_join(shp_1880, ., by = c("ED" = "enumdist")) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
tmp_map_1910 = combined %>%
filter(year == 1910,
occ1950_label == "Managers, officials, and proprietors (n.e.c.)") %>%
group_by(enumdist) %>%
summarise(count = n()) %>%
left_join(shp_1910, ., by = c("ED" = "enumdist")) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
scale_fill_viridis_c() +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
ggarrange(tmp_map_1850, tmp_map_1880, tmp_map_1910,
labels = c("1850", "1880", "1910"),
nrow = 1,
font.label = list(size = 12, face = "plain", family = "Quicksand"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
Managers, officials, and proprietors (n.e.c.) Normalised
tmp_map_1850 = combined_1850 %>%
group_by(ward) %>%
mutate(ward_size = n()) %>%
filter(year == 1850,
occ1950_label == "Managers, officials, and proprietors (n.e.c.)") %>%
group_by(ward, ward_size) %>%
summarise(count = n()) %>%
mutate(count = count / ward_size * 100,
count = case_when(
count <= 5 ~ 1,
count <= 10 ~ 2,
count <= 15 ~ 3,
count <= 20 ~ 4,
count <= 100 ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 5", " 6 - 10",
"11 - 15", "16 - 20",
"21 - 100"))) %>%
left_join(shp_1850, ., by = c("Ward_Num" = "ward")) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
scale_fill_viridis_d(drop = FALSE) +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
tmp_map_1880 = combined %>%
filter(year == 1880) %>%
group_by(enumdist) %>%
mutate(enum_size = n()) %>%
filter(occ1950_label == "Managers, officials, and proprietors (n.e.c.)") %>%
group_by(occ1950_label, enumdist, enum_size) %>%
summarise(count = n()) %>%
mutate(count = count / enum_size * 100,
count = case_when(
count <= 5 ~ 1,
count <= 10 ~ 2,
count <= 15 ~ 3,
count <= 25 ~ 4,
count <= 100 ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 5", " 6 - 10",
"11 - 15", "16 - 20",
"21 - 100"))) %>%
left_join(shp_1880, ., by = c("ED" = "enumdist")) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
scale_fill_viridis_d(drop = FALSE) +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
tmp_map_1910 = combined %>%
filter(year == 1910) %>%
group_by(enumdist) %>%
mutate(enum_size = n()) %>%
filter(occ1950_label == "Managers, officials, and proprietors (n.e.c.)") %>%
group_by(occ1950_label, enumdist, enum_size) %>%
summarise(count = n()) %>%
mutate(count = count / enum_size * 100,
count = case_when(
count <= 5 ~ 1,
count <= 10 ~ 2,
count <= 15 ~ 3,
count <= 20 ~ 4,
count <= 100 ~ 5
),
count = factor(count,
levels = 1:5,
labels = c(" 0 - 5", " 6 - 10",
"11 - 15", "16 - 20",
"21 - 100"))) %>%
left_join(shp_1910, ., by = c("ED" = "enumdist")) %>%
ggplot() +
geom_sf(aes(fill = count),
colour = "black",
size = 0.25) +
scale_fill_viridis_d(drop = FALSE) +
labs(fill = "Count") +
theme(legend.position = "right",
axis.text = element_blank(),
strip.text.x = element_text(size = 7),
legend.key.height = unit(2,"line"),
legend.key.width = unit(1,"line"),
panel.grid = element_blank())
ggarrange(tmp_map_1850, tmp_map_1880, tmp_map_1910,
labels = c("1850", "1880", "1910"),
nrow = 1,
font.label = list(size = 12, face = "plain", family = "Quicksand"))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font width unknown for character 0x41